import plotly.offline as py
from plotly.graph_objs import *
import plotly.graph_objs as go
import numpy as np
import pandas as pd
import warnings
from plotly.tools import FigureFactory as ff
import colorlover as cl
import skbio
from skbio.alignment import global_pairwise_align_nucleotide
from skbio.sequence import DNA
import pandas as pd
import itertools
import numpy as np
from skbio.sequence.distance import hamming
from skbio.stats.distance import DistanceMatrix
from scipy.spatial.distance import pdist, squareform
import plotly.express as px
warnings.filterwarnings('ignore')
trace1 = {
"uid": "ec0c5770-e86c-4231-9350-c8a5424aeeef",
"mode": "markers",
"type": "scatter",
"x": [-2.330078922787911, -2.0578559805954546, -1.8851770324320443, -1.75530050130824, -1.649672679353478, -1.5597799921032536, -1.480972651368176, -1.4104195313382353, -1.3462626652319192, -1.2872137328173294, -1.2323408611117501, -1.1809470407966427, -1.132496529618965, -1.086568114986069, -1.0428242390384295, -1.0009899168818812, -0.9608379310031608, -0.9221781782775869, -0.8848498412982431, -0.8487155274221447, -0.8136568081151939, -0.7795707737384843, -0.746367337187046, -0.713967098197978, -0.6822996332113872, -0.6513021122615636, -0.6209181700422963, -0.5910969765728049, -0.5617924660992517, -0.5329626925342973, -0.5045692868981857, -0.4765769975882356, -0.44895329836199876, -0.4216680520195305, -0.3946932201593915, -0.3680026112393317, -0.341571660625871, -0.3153772374626603, -0.28939747409646166, -0.2636116145249011, -0.23799987891187127, -0.212543341685061, -0.18722382110885646, -0.162023778532748, -0.13692622576424984, -0.11191463921698815, -0.08697287964740107, -0.06208511642395301, -0.03723575537593384, -0.012409369348679852, 0.012409369348679852, 0.03723575537593384, 0.06208511642395301, 0.0869728796474012, 0.11191463921698828, 0.13692622576424998, 0.16202377853274785, 0.1872238211088563, 0.21254334168506087, 0.23799987891187127, 0.2636116145249011, 0.28939747409646166, 0.3153772374626603, 0.341571660625871, 0.3680026112393317, 0.3946932201593915, 0.4216680520195306, 0.448953298361999, 0.4765769975882358, 0.5045692868981856, 0.5329626925342971, 0.5617924660992515, 0.5910969765728049, 0.6209181700422963, 0.6513021122615636, 0.6822996332113872, 0.713967098197978, 0.746367337187046, 0.7795707737384843, 0.8136568081151939, 0.848715527422145, 0.8848498412982432, 0.9221781782775866, 0.9608379310031608, 1.0009899168818812, 1.0428242390384295, 1.086568114986069, 1.132496529618965, 1.1809470407966427, 1.2323408611117501, 1.2872137328173294, 1.3462626652319198, 1.4104195313382353, 1.480972651368176, 1.5597799921032531, 1.6496726793534775, 1.75530050130824, 1.8851770324320436, 2.0578559805954537, 2.3300789227879104],
"y": [39.34143945759198, 40.02803115129471, 40.11135859671047, 40.22743939999248, 40.71907259694426, 41.28313852050546, 42.27299853944366, 42.458392564183924, 42.90721985388303, 44.31698894084478, 44.60097557107362, 44.77433731242781, 45.17467164741618, 45.30283320118724, 45.97704428865023, 46.00995591163878, 46.230535196261506, 46.34015248999396, 46.399572196405515, 46.51094984764143, 47.2534549294726, 47.30060464333649, 47.35351959500256, 47.49135550202566, 47.553313915576744, 47.5895343058226, 47.61928992536331, 48.18750408422179, 48.1890977642888, 48.24564054280064, 48.30429876968282, 48.31183831457235, 48.6437600601711, 48.663414056632156, 48.83908872027955, 48.886131504485, 48.92105053834739, 48.93651180644744, 49.1269989470353, 49.2628987113649, 49.39047154419056, 49.4386376440292, 49.59438908091527, 49.670760761443994, 49.69043985253798, 49.75584744330624, 49.87047331591907, 49.95808075035739, 50.021457154670166, 50.44793806294758, 50.49574607917622, 50.54274262857485, 50.66354147879976, 50.67568439224318, 50.97506639616547, 51.01790398663739, 51.0863257255902, 51.1431506506233, 51.22271988323995, 51.3275579284606, 51.33035082000276, 51.44547101901968, 51.56084968156611, 51.57376889405867, 51.59678210545612, 51.966706085408696, 52.00104994126174, 52.16513094976799, 52.22568806415174, 52.304514508358594, 52.82576334828314, 53.06602092429819, 53.10300331504307, 53.1066798694524, 53.301157756131566, 53.54080009963302, 53.559795082599194, 53.57639487199203, 54.134993113311324, 54.21112369191328, 54.587294687552145, 54.59134575739708, 54.9453622874972, 55.14137038991352, 55.23091428404132, 55.538541173913245, 55.618456267047115, 55.64392576554938, 56.01518686906106, 56.28236131877578, 56.54236540431739, 56.65793252064759, 56.98998188546788, 57.42268500918291, 58.36311106653914, 58.72407074169651, 59.92542295712866, 61.92483665355549, 61.97351832442268, 62.338255282173016],
"marker": {"color": "#19d3f3"}
}
trace2 = {
"uid": "d1dcb3a4-a6c7-44f3-b911-c59c66739751",
"line": {"color": "#636efa"},
"mode": "lines",
"type": "scatter",
"x": [-2.330078922787911, -2.0578559805954546, -1.8851770324320443, -1.75530050130824, -1.649672679353478, -1.5597799921032536, -1.480972651368176, -1.4104195313382353, -1.3462626652319192, -1.2872137328173294, -1.2323408611117501, -1.1809470407966427, -1.132496529618965, -1.086568114986069, -1.0428242390384295, -1.0009899168818812, -0.9608379310031608, -0.9221781782775869, -0.8848498412982431, -0.8487155274221447, -0.8136568081151939, -0.7795707737384843, -0.746367337187046, -0.713967098197978, -0.6822996332113872, -0.6513021122615636, -0.6209181700422963, -0.5910969765728049, -0.5617924660992517, -0.5329626925342973, -0.5045692868981857, -0.4765769975882356, -0.44895329836199876, -0.4216680520195305, -0.3946932201593915, -0.3680026112393317, -0.341571660625871, -0.3153772374626603, -0.28939747409646166, -0.2636116145249011, -0.23799987891187127, -0.212543341685061, -0.18722382110885646, -0.162023778532748, -0.13692622576424984, -0.11191463921698815, -0.08697287964740107, -0.06208511642395301, -0.03723575537593384, -0.012409369348679852, 0.012409369348679852, 0.03723575537593384, 0.06208511642395301, 0.0869728796474012, 0.11191463921698828, 0.13692622576424998, 0.16202377853274785, 0.1872238211088563, 0.21254334168506087, 0.23799987891187127, 0.2636116145249011, 0.28939747409646166, 0.3153772374626603, 0.341571660625871, 0.3680026112393317, 0.3946932201593915, 0.4216680520195306, 0.448953298361999, 0.4765769975882358, 0.5045692868981856, 0.5329626925342971, 0.5617924660992515, 0.5910969765728049, 0.6209181700422963, 0.6513021122615636, 0.6822996332113872, 0.713967098197978, 0.746367337187046, 0.7795707737384843, 0.8136568081151939, 0.848715527422145, 0.8848498412982432, 0.9221781782775866, 0.9608379310031608, 1.0009899168818812, 1.0428242390384295, 1.086568114986069, 1.132496529618965, 1.1809470407966427, 1.2323408611117501, 1.2872137328173294, 1.3462626652319198, 1.4104195313382353, 1.480972651368176, 1.5597799921032531, 1.6496726793534775, 1.75530050130824, 1.8851770324320436, 2.0578559805954537, 2.3300789227879104],
"y": [39.130691799051924, 40.44694345743266, 41.28188025613511, 41.90985891987164, 42.4205902884306, 42.855239174804616, 43.23628822269047, 43.57742648420294, 43.88763759546647, 44.173150839098106, 44.43847200490756, 44.68697127052955, 44.92123905550531, 45.1433120058616, 45.35482228047826, 45.55709948559664, 45.75124226274281, 45.93816979804823, 46.11865967541837, 46.293376215401395, 46.46289204333575, 46.62770474763841, 46.78824991724382, 46.944911468095555, 47.0980299120063, 47.247909044261995, 47.394821402243856, 47.53901275894208, 47.68070585136917, 47.82010349713697, 47.95739121783608, 48.09273946192363, 48.22630550020206, 48.35823505198176, 48.48866368846805, 48.617718050937555, 48.745516914241854, 48.87217212063709, 48.99778940454302, 49.12246912532807, 49.24630692240406, 49.36939430464876, 49.491819184342674, 49.61366636432289, 49.7350179858533, 49.85595394373554, 49.97655227439789, 50.09688952206855, 50.21704108764098, 50.33708156445307, 50.45708506491566, 50.57712554172775, 50.69727710730018, 50.81761435497085, 50.93821268563319, 51.059148643515435, 51.18050026504584, 51.30234744502606, 51.42477232471997, 51.54785970696467, 51.671697504040665, 51.79637722482571, 51.92199450873164, 52.04864971512688, 52.17644857843118, 52.30550294090068, 52.435931577386974, 52.56786112916667, 52.701427167445104, 52.83677541153265, 52.97406313223176, 53.11346077799956, 53.25515387042665, 53.399345227124876, 53.54625758510674, 53.696136717362435, 53.84925516127318, 54.005916712124915, 54.16646188173032, 54.33127458603298, 54.50079041396734, 54.67550695395036, 54.8559968313205, 55.042924366625925, 55.237067143772094, 55.43934434889047, 55.65085462350713, 55.87292757386342, 56.107195358839185, 56.35569462446117, 56.621015790270626, 56.90652903390227, 57.216740145165794, 57.55787840667826, 57.938927454564116, 58.37357634093813, 58.884307709497094, 59.51228637323362, 60.347223171936065, 61.6634748303168]
}
data = Data([trace1, trace2])
layout = {
"title": {"text": "Quantile-Quantile Plot"},
"width": 800,
"xaxis": {
"title": {"text": "Theoritical Quantities"},
"zeroline": False
},
"yaxis": {"title": {"text": "Sample Quantities"}},
"height": 700,
"showlegend": False
}
fig = Figure(data=data, layout=layout)
plot_url = py.iplot(fig)
f = 'seqs_quals.fastq'
seqs = skbio.io.read(f, format='fastq', verify=False, variant='illumina1.8')
seq1 = seqs.__next__()
#seq1
#seq1.positional_metadata.quality[:10]
seqs = skbio.io.read(f, format='fastq', verify=False, variant='illumina1.8')
df = pd.DataFrame()
num_sequences = 500
for count, seq in enumerate(itertools.islice(seqs, num_sequences)):
df[count] = seq.positional_metadata.quality
purd = cl.scales['11']['div']['RdYlBu']
purd40 = cl.interp(purd, 40)
traces = []
for e in range(len(df)):
traces.append(go.Box(
y=df.iloc[e].values,
name=e,
boxpoints='outliers',
whiskerwidth=0.2,
marker=dict(
size=.1,
color=purd40[int(round(df.iloc[e].mean(), 0))]
),
line=dict(width=1),
))
layout = go.Layout(
title='Quality Score Distributions',
yaxis=dict(
title='Quality Score',
autorange=True,
showgrid=True,
zeroline=True,
gridcolor='#d9d4d3',
zerolinecolor='#d9d4d3',
),
xaxis=dict(
title='Base Position',
),
font=dict(family='Times New Roman', size=16, color='#2e1c18'),
paper_bgcolor='#eCe9e9',
plot_bgcolor='#eCe9e9'
)
fig = go.Figure(data=traces, layout=layout)
py.iplot(fig, filename='quality-scores')
seqs = [DNA(e) for e in itertools.islice(skbio.io.read(f, format='fastq', variant='illumina1.8'), 2)]
aligned_seqs = global_pairwise_align_nucleotide(seqs[0], seqs[1])
msa = skbio.alignment.TabularMSA.read('msa10.fna', constructor=DNA)
base_dic = {'A': 1, 'C': .25, 'G': .5, 'T': .75, '-': 0}
def seq_align_for_plot(msa):
base_text = [list(str(e)) for e in msa]
base_values = np.zeros((len(base_text), len(base_text[0])))
for i in range(len(base_text[0])):
for j in range(len(base_text)):
if base_text[j][i] != base_text[0][i]:
base_values[j][i] = base_dic[base_text[j][i]]
return(base_text, base_values)
base_text, base_values = seq_align_for_plot(msa)
colorscale=[[0.00, '#F4F0E4'],
[0.25, '#1b9e77'],
[0.50, '#d95f02'],
[0.75, '#7570b3'],
[1.00, '#e7298a']]
seq_names = ["Seq " + str(e + 1) for e in range(len(base_text))]
fig = ff.create_annotated_heatmap(base_values,
annotation_text=base_text,
colorscale=colorscale)
fig['layout'].update(
title="Aligned Sequences",
xaxis=dict(ticks='',
side='top',
ticktext=list(np.arange(0, len(base_text[0]), 10)),
tickvals=list(np.arange(0, len(base_text[0]), 10)),
showticklabels=True,
tickfont=dict(family='Bookman',
size=18,
color='#22293B',
),
),
yaxis=dict(autorange='reversed',
ticks='',
ticksuffix=' ',
ticktext=seq_names,
tickvals=list(np.arange(0, len(base_text))),
showticklabels=True,
tickfont=dict(family='Bookman',
size=18,
color='#22293B',
),
),
width=10000,
height=450,
autosize=True,
annotations=dict(font=dict(family='Courier New, monospace',
size=14,
color='#3f566d'
),
)
)
py.iplot(fig, filename='msa')
hamming_dm = DistanceMatrix.from_iterable(msa, metric=hamming, key='id')
colorscale = [[0.0,'#4bd4d1'], [0.5, '#1c9099']] # custom colorscale
['#ece2f0','#a6bddb','#1c9099']
trace = go.Heatmap(x=seq_names, y=seq_names, z=hamming_dm.data, colorscale='Viridis')
fig = go.Figure(data=[trace])
fig['layout'].update(
xaxis=dict(ticks=''),
yaxis=dict(ticks='', ticksuffix=' '),
width=700,
height=700,
autosize=False,
margin=go.Margin(
l=200,
b=200,
pad=4
)
)
py.iplot(fig, filename='dm_heatmap')
hamming_array = hamming_dm.data + .001
np.fill_diagonal(hamming_array, 0)
names = hamming_dm.ids
group1 = ['Seq 1', 'Seq 7', 'Seq 9', 'Seq 3']
group2 = ['Seq 2', 'Seq 4', 'Seq 5', 'Seq 6', 'Seq 8']
names = ["Seq " + str(e + 1) for e in range(len(hamming_array))]
fig = ff.create_dendrogram(hamming_array.data,
orientation='right',
labels=names)
fig['layout'].update(
width=500,
height=700,
)
fig['layout']['yaxis'].update(dict(side='right',
showline=False,
ticks='',
zeroline=True,
zerolinewidth=4,
zerolinecolor='#969696')
)
fig['layout']['xaxis'].update(dict(showline=False,
showticklabels=True,
ticktext=np.array(['0.0', '0.2', '0.4', '0.6', '0.8']),
tickvals=[-0.8, -0.6, -0.4, -0.2, -0.0],
tickfont=dict(color='#969696'),
mirror=None,
ticklen=8,
tickwidth=4,
tickcolor='#969696',
range=[-0.9, 0.1])
)
tips = []
for i in fig['data']:
if 0 in i['x']:
i['marker']['color'] = '#ffbe4f'
else:
i['marker']['color'] = '#0c457d'
for i in range(len(names)):
if fig['layout']['yaxis']['ticktext'][i] in group1:
color = '#e8702a'
symbol = 'circle'
else:
color = '#0ea7b5'
symbol = 'triangle-left'
x = [-0.0]
y = [fig['layout']['yaxis']['tickvals'][i]]
trace = go.Scatter(x=x, y=y,
mode='markers',
marker=dict(
size=10,
color=color,
symbol=symbol)
)
fig.add_trace(trace)
py.iplot(fig, filename='phylo-dendrogram')
# Dendrogram with a Heatmap
# get data
data = np.genfromtxt("http://files.figshare.com/2133304/ExpRawData_E_TABM_84_A_AFFY_44.tab",
names=True,usecols=tuple(range(1,30)),dtype=float, delimiter="\t")
data_array = data.view((np.float, len(data.dtype.names)))
data_array = data_array.transpose()
labels = data.dtype.names
# Initialize figure by creating upper dendrogram
fig = ff.create_dendrogram(data_array, orientation='bottom', labels=labels)
for i in range(len(fig['data'])):
fig['data'][i]['yaxis'] = 'y2'
# Create Side Dendrogram
dendro_side = ff.create_dendrogram(data_array, orientation='right')
for i in range(len(dendro_side['data'])):
dendro_side['data'][i]['xaxis'] = 'x2'
# Add Side Dendrogram Data to Figure
for data in dendro_side['data']:
fig.add_trace(data)
# Create Heatmap
dendro_leaves = dendro_side['layout']['yaxis']['ticktext']
dendro_leaves = list(map(int, dendro_leaves))
data_dist = pdist(data_array)
heat_data = squareform(data_dist)
heat_data = heat_data[dendro_leaves,:]
heat_data = heat_data[:,dendro_leaves]
heatmap = [
go.Heatmap(
x = dendro_leaves,
y = dendro_leaves,
z = heat_data,
colorscale = 'Blues'
)
]
heatmap[0]['x'] = fig['layout']['xaxis']['tickvals']
heatmap[0]['y'] = dendro_side['layout']['yaxis']['tickvals']
# Add Heatmap Data to Figure
for data in heatmap:
fig.add_trace(data)
# Edit Layout
fig.update_layout({'width':800, 'height':800,
'showlegend':False, 'hovermode': 'closest',
})
# Edit xaxis
fig.update_layout(xaxis={'domain': [.15, 1],
'mirror': False,
'showgrid': False,
'showline': False,
'zeroline': False,
'ticks':""})
# Edit xaxis2
fig.update_layout(xaxis2={'domain': [0, .15],
'mirror': False,
'showgrid': False,
'showline': False,
'zeroline': False,
'showticklabels': False,
'ticks':""})
# Edit yaxis
fig.update_layout(yaxis={'domain': [0, .85],
'mirror': False,
'showgrid': False,
'showline': False,
'zeroline': False,
'showticklabels': False,
'ticks': ""
})
# Edit yaxis2
fig.update_layout(yaxis2={'domain':[.825, .975],
'mirror': False,
'showgrid': False,
'showline': False,
'zeroline': False,
'showticklabels': False,
'ticks':""})
# Plot!
fig.show()
# 3D scatter plot with plotly express
import plotly.express as px
iris = px.data.iris()
fig = px.scatter_3d(iris, x='sepal_length', y='sepal_width', z='petal_width',
color='species')
fig.show()